package com.leadingsoft.controller.parse;

import java.util.List;

import org.apache.commons.lang3.StringUtils;
import org.beetl.sql.core.SQLManager;
import org.cef.browser.CefBrowser;
import org.cef.callback.CefStringVisitor;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.leadingsoft.common.Constant;
import com.leadingsoft.common.TaskQuene;
import com.leadingsoft.common.model.Hotel;
import com.leadingsoft.common.model.HotelLog;
import com.leadingsoft.controller.parse.impl.GetCtripHotel;

/**
 * @ClassName ParsingHtmlDocument
 * @Description 解析HTML示例<br>
 *              在visit方法中调用解析页面方法获取数据<br>
 *              示例网站：携程酒店信息
 * @author gongym
 * @date 2018年6月9日 下午10:30:09
 */
public class ParsingCtripHotel implements CefStringVisitor {
	private static Logger logger = LoggerFactory.getLogger(ParsingCtripHotel.class);
	private CefBrowser browser;
	private String listSelector;
	private SQLManager sqlManager;
	private Long startIndex;
	private Long stopIndex;

	public ParsingCtripHotel(CefBrowser browser, String listSelector, SQLManager sqlManager, Long startIndex,
			Long stopIndex) {
		this.browser = browser;
		this.listSelector = listSelector;
		this.sqlManager = sqlManager;
		this.startIndex = startIndex;
		this.stopIndex = stopIndex;
	}
	@Override
	public void visit(String string) {
		String url = browser.getURL();
		logger.debug("开始解析当前URL：{}", url);
		Document htmlDocument = Jsoup.parse(string);
		Elements elementsByClass = htmlDocument.getElementsByClass(listSelector);
		// 检查当前URL是否被解析过了
		HotelLog hotelLogTemplate = new HotelLog();
		hotelLogTemplate.setSpideredUrl(url);
		HotelLog hotelLog = sqlManager.templateOne(hotelLogTemplate);
		if (null == hotelLog) {
			logger.debug("当前页面没有被解析过，调用解析方法进行解析");
			GetCtripHotel getCtripHotelList = new GetCtripHotel();
			List<Object> hotelList = getCtripHotelList.elementsToObjects(elementsByClass);
			String pageIndex = StringUtils.remove(url, Constant.CTRIPHOTELLISTUTLTEMP);
			hotelList.stream().filter(object -> object instanceof Hotel).forEach(object -> {
				Hotel hotel = (Hotel) object;
				hotel.setPageIndex(Long.parseLong(pageIndex));
			});
			logger.debug("当前页面解析成功，当前URL为：{}，当前页为：{}，共获取到：{}条数据", url, pageIndex, hotelList.size());
			sqlManager.insertBatch(Hotel.class, hotelList);
			logger.debug("保存解析列表数据到数据库成功");
			HotelLog nowHotelLog = new HotelLog();
			nowHotelLog.setSpideredUrl(url);
			sqlManager.insert(nowHotelLog);
			logger.debug("插入当前URL到日志表");
		}
		logger.debug("当前页面已经解析完毕，开始获下一页URL");
		String nextUrl = TaskQuene.getCtripHotelListUrl(url, startIndex, stopIndex);
		if (StringUtils.isNotEmpty(nextUrl)) {
			TaskQuene.ctripTaskUrl.add(nextUrl.toString());
			logger.debug("获取到下一页URL为：{}", nextUrl);
			logger.debug("当前队列中的URL为：{}", TaskQuene.ctripTaskUrl);
			logger.debug("===================={}任务结束====================", nextUrl);
		} else {
			// 当前任务结束。关闭浏览器
			logger.debug("未获取到下一页URL，抓取任务结束");
			browser.close();
		}
	}
}
